1 Setup

1.1 Background

Using DALEX package to explore results from various models fitted with `caret. Based on info from here.

1.2 Load data

1.3 Load results

# reg trees
m_cubist_tu <- read_rds("results/m_cubist_tu.Rds")

# rf x2
m_rf_tu <- read_rds("results/m_rf_tu.Rds")
m_rf2_tu <- read_rds("results/m_rf2_tu.Rds")

#xgb
m_xgb_tu <- read_rds("results/m_xgb_tu.Rds")

2 The explain() function

explainer_cubist <- DALEX::explain(m_cubist_tu, label = "cubist", 
                                   data = training, y = training$outcome, 
                                   verbose = FALSE)

explainer_rf1 <- DALEX::explain(m_rf_tu, label = "rf1", 
                                data = training, y = training$outcome, 
                                verbose = FALSE)

explainer_rf2 <- DALEX::explain(m_rf2_tu, label = "rf2", 
                                data = training, y = training$outcome, 
                                verbose = FALSE)

explainer_xgb <- DALEX::explain(m_xgb_tu, label = "xgb", 
                                data = training_x, y = training_y, 
                                verbose = FALSE)

3 Model performance

(mp_cubist <- model_performance(explainer_cubist))
Measures for:  regression
mse        : 52.95139 
rmse       : 7.27677 
r2         : 0.9894604 
mad        : 3.198486

Residuals:
          0%          10%          20%          30%          40%          50% 
-37.13000488  -5.65989990  -3.19515381  -1.12048340  -0.08852539   0.83337402 
         60%          70%          80%          90%         100% 
  1.87998047   3.08745117   4.68081055   6.86838379  28.68078613 
(mp_rf1 <- model_performance(explainer_rf1))
Measures for:  regression
mse        : 218.1711 
rmse       : 14.77061 
r2         : 0.9565747 
mad        : 7.988033

Residuals:
          0%          10%          20%          30%          40%          50% 
-69.97976667 -16.94812667  -8.53983333  -5.38518000  -2.63411333   0.01616667 
         60%          70%          80%          90%         100% 
  3.72900000   7.30695333  12.41252667  16.34528667  39.15060000 
(mp_rf2 <- model_performance(explainer_rf2))
Measures for:  regression
mse        : 199.835 
rmse       : 14.1363 
r2         : 0.9602243 
mad        : 7.901833

Residuals:
         0%         10%         20%         30%         40%         50% 
-63.7121667 -16.7040667  -9.2787333  -4.0750000  -1.5991000   0.8626667 
        60%         70%         80%         90%        100% 
  4.4458000   7.7807667  11.5134667  16.0514333  33.9691667 
(mp_xgb <- model_performance(explainer_xgb))
Measures for:  regression
mse        : 155.8606 
rmse       : 12.48441 
r2         : 0.9689771 
mad        : 7.642578

Residuals:
         0%         10%         20%         30%         40%         50% 
-36.3118896 -16.4187256  -9.4296875  -4.3185547  -2.4572266   0.5006104 
        60%         70%         80%         90%        100% 
  3.0805176   5.7313965  10.0908691  15.6971924  28.3033447 

4 Variable importance

vi_cubist <- model_parts(explainer_cubist, loss_function = loss_root_mean_square)
vi_rf1 <- model_parts(explainer_rf1, loss_function = loss_root_mean_square)
vi_rf2 <- model_parts(explainer_rf2, loss_function = loss_root_mean_square)
vi_xgb <- model_parts(explainer_xgb, loss_function = loss_root_mean_square)

5 Partial Dependence Plot

pdp_cubist <- model_profile(explainer_cubist, variable = "poodle", type = "partial")
pdp_rf1 <- model_profile(explainer_rf1, variable = "poodle", type = "partial")
pdp_rf2 <- model_profile(explainer_rf2, variable = "poodle", type = "partial")
pdp_xgb <- model_profile(explainer_xgb, variable = "poodle", type = "partial")

pdp_cubist <- model_profile(explainer_cubist, variable = "staffordshire_bull_terrier", type = "partial")
pdp_rf1 <- model_profile(explainer_rf1, variable = "staffordshire_bull_terrier", type = "partial")
pdp_rf2 <- model_profile(explainer_rf2, variable = "staffordshire_bull_terrier", type = "partial")
pdp_xgb <- model_profile(explainer_xgb, variable = "staffordshire_bull_terrier", type = "partial")

6 Acumulated Local Effects plot

ale_cubist <- model_profile(explainer_cubist, variable = "poodle", type = "accumulated")
ale_rf1 <- model_profile(explainer_rf1, variable = "poodle", type = "accumulated")
ale_rf2 <- model_profile(explainer_rf2, variable = "poodle", type = "accumulated")
ale_xgb <- model_profile(explainer_xgb, variable = "poodle", type = "accumulated")

ale_cubist <- model_profile(explainer_cubist, variable = "staffordshire_bull_terrier", type = "accumulated")
ale_rf1 <- model_profile(explainer_rf1, variable = "staffordshire_bull_terrier", type = "accumulated")
ale_rf2 <- model_profile(explainer_rf2, variable = "staffordshire_bull_terrier", type = "accumulated")
ale_xgb <- model_profile(explainer_xgb, variable = "staffordshire_bull_terrier", type = "accumulated")